<?php

function sogoupc($tasks,$host='www.sogou.com'){

    Global $whitelist; //白名单
    Global $posturl;
    Global $filepath;   //开关文件

    $nul = 0;   //快照为空

    //每页搜索记录
    for($pn = 0;$pn < $tasks['pn'];$pn++){

        //搜狗引擎列表
        $header = getheader($host);

        $url = "http://www.sogou.com/web?query=".urlencode($tasks['search'])."&page=".($pn+1);

        echo $url."\n";

        $contents = Get_Web_Contents($url,'GET','','',$header);//百度搜索结果页


        //出验证码了
        if(strstr($contents['Info']['url'],'antispider')) {
            print_f("搜狗验证码");
            file_put_contents($filepath,"End");
            exit;
        }



        $text = str_replace(array("\r\n", "\r", "\n"), "",$contents['Body']);//合并一行

        $text = preg_replace("/<(style|script)(.*?)<\/(style|script)>/i", "", $text);//去css和js

        #修改成sogou的验证规则
        $preg_rute = '/(<cite id="cacheresult_info_(.*?)快照)/';
        preg_match_all($preg_rute,$text,$content);//取搜索记录

        //每条搜索记录
        foreach($content[0] as $k => $v){

            //取网站域名
            preg_match("/>([^\/]+\.)?({$whitelist})\//",$v,$url);

            //白名单
            if(!empty($url)){
                echo "whitelist \n";
                continue;
            }

            //取网站快照
            preg_match('/" href="h(.*?)\"/',$v,$url);

            #修改快照的链接
            $url[1] = str_replace('amp;','',$url[1]);

            //没快照
            if(empty($url[1])){
                preg_match('/href="(.*?)id="sogou_snapshot_/',$v,$url);
                $url = $url[1];
            }

            //搜狗快照
            $header = getheader('snapshot.sogoucdn.com');

            //快照
            $finalsnapshot = Get_Web_Contents('h'.$url[1],'GET','','',$header);

            //快照为空
            if(empty($finalsnapshot['Body'])){
                $nul++;
                print_f($finalsnapshot['Error']."\n");
                if($nul>10){
                    $nul = 0;
                    sleep(600);
                }
                continue;
            }

            $snapshot = str_replace(array("\r\n", "\r", "\n"," ","\t"), "",$finalsnapshot['Body']);

            $snapshot = mb_convert_encoding($snapshot, 'UTF-8',array("ASCII","GB2312","GBK",'BIG5'));//转码


            preg_match('/<title(.*?)>(.*?)<\/title>/i',$snapshot,$data);

            $snapshot = codeWords($snapshot);//命中词加样式


            if(!empty($snapshot)){

                echo $k;
                print_f($data[2]."\n");

                #判断 该快照 之前 有没有添加过，如果有的话就跳过，没有的话就 进行保存

                $insert['tasks_id'] = $tasks['id'];
                $insert['tasks_name'] = $tasks['name'];
                $insert['search'] = $tasks['search'];
                $insert['keyword'] = $snapshot['count'];
                $insert['url'] = 'h'.$url[1];
                $insert['title'] = $data[2];
                $insert['pn'] = $pn + 1;
                $insert['po'] = $k + 1;
                $insert['status'] = 1;
                $insert['addtime'] = time();
                $insert['snapshot'] = $snapshot['snapshot'];

                    //提交
                    Get_Web_Contents($posturl, 'POST', $insert);

            }else{
                print_f("未命中\n") ;
            }

        }
    }
}

?>
